import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import chart_studio.plotly as py
from IPython.display import IFrame
from datetime import datetime
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
%matplotlib inline
confirmed_cases_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
deaths_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
cured_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"
confirmed_cases = pd.read_csv(confirmed_cases_path)
confirmed_cases.head()
deaths_data = pd.read_csv(deaths_path)
deaths_data.head()
recovered_cases = pd.read_csv(cured_path)
recovered_cases.head()
days_columns = confirmed_cases.columns[4:]
world_cases_growth = confirmed_cases[days_columns].sum(axis = 0).reset_index()
world_cases_growth.columns = ['Date', 'Count']
def isweekend(date):
return datetime.isoweekday(pd.to_datetime(date)) > 5
world_cases_growth['isweekend'] = world_cases_growth['Date'].apply(isweekend).astype(int)
plt.rcParams['figure.figsize'] = [20, 10]
plt.stem(world_cases_growth['Date'], world_cases_growth['Count'], '--ro')
plt.plot(world_cases_growth['Date'], world_cases_growth['Count'], '--bo')
plt.title("Spread of virus per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.yticks(world_cases_growth['Count'])
plt.show()
world_cases_growth['delta_confirmed'] = world_cases_growth['Count'].sub(world_cases_growth['Count'].shift())
world_cases_growth.loc[0, 'delta_confirmed'] = world_cases_growth.loc[0, 'Count']
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['delta_confirmed'], '--yo', color = world_cases_growth['isweekend'])
plt.plot(world_cases_growth['Date'], world_cases_growth['delta_confirmed'], '--ro')
for i in range(world_cases_growth.shape[0]):
plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'delta_confirmed']+2,
world_cases_growth.loc[i,'delta_confirmed'].astype(int))
plt.title("Delta new cases of to Covid-19 identified on each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
world_cases_growth['deaths'] = deaths_data[days_columns].sum(axis = 0).reset_index(drop = True)
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['deaths'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['deaths'], '--ro')
for i in range(world_cases_growth.shape[0]):
plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'deaths']+50,
world_cases_growth.loc[i,'deaths'])
plt.title("Growth of death toll due to Covid-19 per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
But the above representation is a cumulative sum of previous day count and newly added count, We can look at the delta change for every day.
world_cases_growth['delta_deaths'] = world_cases_growth['deaths'].sub(world_cases_growth['deaths'].shift())
world_cases_growth.loc[0, 'delta_deaths'] = world_cases_growth.loc[0, 'deaths']
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['delta_deaths'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['delta_deaths'], '--ro')
for i in range(world_cases_growth.shape[0]):
plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'delta_deaths']+2,
world_cases_growth.loc[i,'delta_deaths'].astype(int))
plt.title("# of deaths due to Covid-19 per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
world_cases_growth['recovered'] = recovered_cases[days_columns].sum(axis = 0).reset_index(drop = True)
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['recovered'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['recovered'], '--go')
for i in range(world_cases_growth.shape[0]):
plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'recovered']+50,
world_cases_growth.loc[i,'recovered'])
plt.title("# of people that recovered from Covid-19 virus")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
plt.rcParams['figure.figsize'] = [20, 10]
plt.figure(1)
plt.subplot(311)
plt.plot(world_cases_growth['Date'], world_cases_growth['Count'], '--bo')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()
plt.subplot(312)
plt.plot(world_cases_growth['Date'], world_cases_growth['deaths'], '--ro')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()
plt.subplot(313)
plt.plot(world_cases_growth['Date'], world_cases_growth['recovered'], '--go')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()
plt.show()
p1 = plt.bar(world_cases_growth['Date'], world_cases_growth['Count'], color = 'yellow')
p2 = plt.bar(world_cases_growth['Date'], world_cases_growth['recovered'], color='g')
p3 = plt.bar(world_cases_growth['Date'], world_cases_growth['deaths'], color='r')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend([p1[0], p2[0], p3[0]], ('Confirmed', 'Recovered', 'Deaths'))
plt.xlabel("Date")
plt.title("Stacked diagram of COVID-19 cases")
world_cases_growth['deaths'].values[-1]/world_cases_growth['Count'].values[-1]
con_cases_growth = confirmed_cases.groupby('Country/Region')[days_columns].agg(np.sum).reset_index()
con_cases_growth.head()
plt.show()
import requests
html_response = requests.get("https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports?_pjax=%23js-repo-pjax-container")
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_response.content, "html.parser")
a_tags = list(map(lambda x : x.get('href'), soup.findAll(name = 'a', attrs={'class' : 'js-navigation-open '})))
repo_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/"+os.path.basename(a_tags[-2])
repo_path
# daily_data = "csse_covid_19_data\\csse_covid_19_daily_reports"
# daily_data_dir = os.path.join(os.path.dirname(os.getcwd()), daily_data)
# daily_data_files = glob.glob(daily_data_dir+'/*.csv')[-1]
daily_data_files = repo_path
latest_data = pd.read_csv(daily_data_files)
latest_data.head()
latest_data['Province/State'] = latest_data['Province/State'].fillna(latest_data['Country/Region'])
latest_data['active'] = latest_data['Confirmed'] - (latest_data[['Deaths', 'Recovered']].sum(axis = 1))
agg_latest_data = latest_data.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'active'].agg([np.sum]).reset_index()
agg_latest_data.columns = agg_latest_data.columns.droplevel(1)
agg_latest_data.head()
agg_latest_data = agg_latest_data.loc[(agg_latest_data[['Confirmed', 'Deaths', 'Recovered']] > 0).all(axis=1), :].reset_index(drop = True)
manual_dict = {'mainland china' : 'CHN', 'north macedonia' : 'MKD', 'palestine' : 'PSE',
'saint barthelemy' : 'FRA', 'south korea' : 'KOR', 'st. martin' : 'PYC',
'uk' : 'GBR', 'us' : 'USA', 'vatican city': 'ITA'}
def get_country_code(series):
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
df2 = pd.read_csv('countryCodes.csv', sep = '\t')
country_code_dict = {row[1][0].lower() : row[1][2] for row in df.iterrows()}
added_dict = {row[1][0].lower() : row[1][2] for row in df2.iterrows()}
# print(country_code_dict)
res = []
for key in series:
if key.lower() in country_code_dict:
res.append(country_code_dict.get(key.lower(), None))
elif key.lower() in added_dict:
res.append(added_dict.get(key.lower(), None))
else:
res.append(manual_dict.get(key.lower(), None))
return res
agg_latest_data['Code'] = get_country_code(agg_latest_data['Country/Region'])
from scipy.stats import rankdata
fig = px.scatter_geo(agg_latest_data, locations='Code', color=rankdata(agg_latest_data['active'])/agg_latest_data.shape[0],
size = np.log(agg_latest_data['active']),
hover_data = ['Country/Region', 'active'],
projection="natural earth", color_continuous_scale = 'YlOrRd',
title="Count of Active COVID-19 cases in each country")
fig.update_geos(resolution=110,
showcountries=True, showcoastlines=False,)
# fig.show()
# fig.write_html("images/worldplot.html")
iplot(fig, filename='images/worldplot')
fig = px.choropleth(agg_latest_data, locations='Code', color=rankdata(agg_latest_data['active'])/agg_latest_data.shape[0],
hover_data = ['Country/Region', 'active'],
projection="natural earth", color_continuous_scale = 'YlOrRd',
title="Count of Active COVID-19 cases in each country")
fig.update_geos(resolution=110,
showcountries=True, showcoastlines=False,)
# fig.show()
# fig.write_html("images/worldplot.html")
iplot(fig, filename='images/worldplot')
On every day new people get affected, a part of affected people recover and some people die. Let us see what is the count of active cases in each day.
fig = px.treemap(latest_data, path = ['Country/Region', 'Province/State'], values = np.log(latest_data['active']+1),
hover_data=['active'], title = "Province/State wise distribution of Active COVID-19 cases in Each country")
fig.update_layout(width=1000,
height=2500)
iplot(fig, filename='images/treemap_active.html')
world_cases_growth['active_cases'] = world_cases_growth['Count'] - (world_cases_growth[['recovered', 'deaths']].sum(axis = 1))
fig = px.scatter(world_cases_growth, x = 'Date', y = 'active_cases', title='Daily count of active cases of COVID 19 through out the world')
# fig.show()
iplot(fig, filename='active_cases')
The number of Active cases have almost doubled with in the span of one week, 55K on 12-Mar and the number went past 120k on 18-Mar
selected_countries = ['China', 'Italy', 'France', 'Spain','Germany', 'Iran',
'Korea, South', 'US', 'United Kingdom', 'Switzerland', 'India']
# selected_dates = [1]+list(range(4, confirmed_cases.shape[1]))
selected_dates = [1]+list(range(confirmed_cases.shape[1]-14, confirmed_cases.shape[1]))
confirmed_cases_view = confirmed_cases.iloc[:, selected_dates]
confirmed_cases_view = confirmed_cases_view.loc[confirmed_cases_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
confirmed_cases_view.columns = confirmed_cases_view.columns.droplevel(1)
confirmed_cases_view
recovered_cases_view = recovered_cases.iloc[:, selected_dates]
recovered_cases_view = recovered_cases_view.loc[recovered_cases_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
recovered_cases_view.columns = recovered_cases_view.columns.droplevel(1)
recovered_cases_view
deaths_data_view = deaths_data.iloc[:, selected_dates]
deaths_data_view = deaths_data_view.loc[deaths_data_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
deaths_data_view.columns = deaths_data_view.columns.droplevel(1)
deaths_data_view
active_data_view = confirmed_cases_view.iloc[:, 1:] - (recovered_cases_view.iloc[:, 1:]+deaths_data_view.iloc[:, 1:])
active_data_view
import plotly.graph_objects as go
fig = go.Figure()
for i in range(confirmed_cases_view.shape[0]):
fig.add_trace(go.Scatter(x = confirmed_cases_view.columns[1:], y = active_data_view.iloc[i, 1:],
name = confirmed_cases_view.iloc[i, 0]))
fig.update_layout(title = "Spread of COVID-19 in last 14 days in major Countries", height=800, width = 1000)
iplot(fig, filename='country_spread')
The graph shows how much a nation is prepared for an eidemic, clearly Italy was not expecting such massive outbreak. China on the otherhand showed the world that with proper care this Virus can be contained.